In recognition of the rapid pace of analytical tools and approaches to EWAS and Illumina 450k arrays, we conducted an analysis of our dataset guided by the recommendations of Maksimovic et al. (http://f1000research.com/articles/5-1281/v3). This Rmarkdown document contains the full code used to generate the plots and tables presented.
# package loading
library(data.table)
library(dplyr)
library(limma)
library(minfi)
library(IlluminaHumanMethylation450kanno.ilmn12.hg19)
library(IlluminaHumanMethylation450kmanifest)
library(RColorBrewer)
library(missMethyl)
library(matrixStats)
library(Gviz)
Loading required package: grid
library(DMRcate)
Loading required package: DSS
Loading required package: bsseq
package 'bsseq' was built under R version 3.4.1
Attaching package: 'bsseq'
The following object is masked from 'package:minfi':
getMeth
Loading required package: splines
Loading required package: DMRcatedata
library(stringr)
library(data.table)
library(dplyr)
sessionInfo()
R version 3.4.0 (2017-04-21)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: OS X El Capitan 10.11.6
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] splines grid stats4 parallel stats graphics grDevices utils datasets methods base
other attached packages:
[1] stringr_1.2.0 DMRcate_1.12.1 DMRcatedata_1.12.0 DSS_2.16.0
[5] bsseq_1.12.2 Gviz_1.20.0 missMethyl_1.10.0 RColorBrewer_1.1-2
[9] IlluminaHumanMethylation450kmanifest_0.4.0 IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.0 BiocInstaller_1.26.1 minfi_1.22.1
[13] bumphunter_1.16.0 locfit_1.5-9.1 iterators_1.0.8 foreach_1.4.3
[17] Biostrings_2.44.2 XVector_0.16.0 SummarizedExperiment_1.6.4 DelayedArray_0.2.7
[21] matrixStats_0.52.2 Biobase_2.36.2 GenomicRanges_1.28.5 GenomeInfoDb_1.12.2
[25] IRanges_2.10.3 S4Vectors_0.14.4 BiocGenerics_0.22.0 limma_3.32.6
[29] dplyr_0.7.3 data.table_1.10.4
loaded via a namespace (and not attached):
[1] backports_1.1.0 Hmisc_4.0-3 AnnotationHub_2.8.2 plyr_1.8.4
[5] lazyeval_0.2.0 BiocParallel_1.10.1 ggplot2_2.2.1.9000 digest_0.6.12
[9] ensembldb_2.0.4 htmltools_0.3.6 GO.db_3.4.1 magrittr_1.5
[13] checkmate_1.8.3 memoise_1.1.0 BSgenome_1.44.1 cluster_2.0.6
[17] annotate_1.54.0 R.utils_2.5.0 siggenes_1.50.0 colorspace_1.3-2
[21] blob_1.1.0 BiasedUrn_1.07 RCurl_1.95-4.8 genefilter_1.58.1
[25] bindr_0.1 GEOquery_2.42.0 survival_2.41-3 VariantAnnotation_1.22.3
[29] glue_1.1.1 ruv_0.9.6 registry_0.3 gtable_0.2.0
[33] zlibbioc_1.22.0 scales_0.5.0.9000 DBI_0.7 rngtools_1.2.4
[37] Rcpp_0.12.12 xtable_1.8-2 htmlTable_1.9 foreign_0.8-69
[41] bit_1.1-12 mclust_5.3 preprocessCore_1.38.1 Formula_1.2-2
[45] htmlwidgets_0.9 httr_1.3.1 acepack_1.4.1 pkgconfig_2.0.1
[49] reshape_0.8.7 XML_3.98-1.9 R.methodsS3_1.7.1 nnet_7.3-12
[53] rlang_0.1.2 AnnotationDbi_1.38.2 munsell_0.4.3 tools_3.4.0
[57] RSQLite_2.0 yaml_2.1.14 org.Hs.eg.db_3.4.1 knitr_1.17
[61] bit64_0.9-7 beanplot_1.2 methylumi_2.22.0 AnnotationFilter_1.0.0
[65] bindrcpp_0.2 nlme_3.1-131 doRNG_1.6.6 mime_0.5
[69] nor1mix_1.2-3 R.oo_1.21.0 biomaRt_2.32.1 compiler_3.4.0
[73] curl_2.8.1 interactiveDisplayBase_1.14.0 tibble_1.3.4 statmod_1.4.30
[77] stringi_1.1.5 GenomicFeatures_1.28.5 lattice_0.20-35 IlluminaHumanMethylationEPICanno.ilm10b2.hg19_0.6.0
[81] ProtGenerics_1.8.0 Matrix_1.2-11 permute_0.9-4 multtest_2.32.0
[85] bitops_1.0-6 httpuv_1.3.5 rtracklayer_1.36.4 R6_2.2.2
[89] latticeExtra_0.6-28 gridExtra_2.3 codetools_0.2-15 dichromat_2.0-0
[93] MASS_7.3-47 gtools_3.5.0 assertthat_0.2.0 openssl_0.9.7
[97] pkgmaker_0.22 GenomicAlignments_1.12.2 Rsamtools_1.28.0 GenomeInfoDbData_0.99.0
[101] quadprog_1.5-5 rpart_4.1-11 base64_2.0 illuminaio_0.18.0
[105] biovizBase_1.24.0 shiny_1.0.5 base64enc_0.1-3
head(detP %>% DT::datatable())
$x
$x$filter
[1] "none"
$x$data
$x$container
[1] "<table class=\"display\">\n <thead>\n <tr>\n <th> </th>\n <th>A31241</th>\n <th>A07627</th>\n <th>A16016</th>\n <th>A12853</th>\n <th>A41135</th>\n <th>A18307</th>\n <th>B08456</th>\n <th>B55270</th>\n <th>B62321</th>\n <th>A33328</th>\n <th>B00506</th>\n <th>B09297</th>\n <th>B54759</th>\n <th>A39244</th>\n <th>B62382</th>\n <th>A19082</th>\n <th>B09987</th>\n <th>A18725</th>\n <th>A08725</th>\n <th>A35720</th>\n <th>B09869</th>\n <th>B50819</th>\n <th>B53477</th>\n <th>B09936</th>\n <th>B03613</th>\n <th>B52639</th>\n <th>B08543</th>\n <th>B03745</th>\n <th>A33401</th>\n <th>A10602</th>\n <th>A40627</th>\n <th>A32982</th>\n <th>A36949</th>\n <th>A34182</th>\n <th>B50075</th>\n <th>B50263</th>\n <th>B62253</th>\n <th>B09582</th>\n <th>B53330</th>\n <th>A15072</th>\n <th>A16531</th>\n <th>B08035</th>\n <th>A33134</th>\n <th>B06394</th>\n <th>B51657</th>\n <th>A26886</th>\n <th>A34303</th>\n <th>B54178</th>\n <th>B07227</th>\n <th>A23467</th>\n <th>B50299</th>\n <th>A39840</th>\n <th>A19654</th>\n <th>B51013</th>\n <th>A37706</th>\n <th>A35050</th>\n <th>B53321</th>\n <th>A35358</th>\n <th>B09758</th>\n <th>A11460</th>\n <th>A11816</th>\n <th>A07358</th>\n <th>A34569</th>\n <th>B03416</th>\n <th>B62049</th>\n <th>B54450</th>\n <th>A13635</th>\n <th>A09636</th>\n <th>A27829</th>\n <th>B05865</th>\n <th>B08877</th>\n <th>A22227</th>\n <th>B53679</th>\n <th>A33699</th>\n <th>A35174</th>\n <th>B04000</th>\n <th>A09631</th>\n <th>A13799</th>\n <th>B55125</th>\n <th>A10401</th>\n <th>B07831</th>\n <th>B09353</th>\n <th>B54602</th>\n <th>A15597</th>\n <th>A08054</th>\n <th>A28326</th>\n <th>B52341</th>\n <th>B55335</th>\n <th>A40764</th>\n <th>B06018</th>\n <th>B53779</th>\n <th>A08424</th>\n <th>B50435</th>\n <th>B50395</th>\n <th>B51283</th>\n <th>A32642</th>\n <th>A17607</th>\n <th>A19764</th>\n <th>B51250</th>\n <th>A23005</th>\n <th>A25857</th>\n <th>B05695</th>\n <th>A07449</th>\n <th>B54644</th>\n <th>B06778</th>\n <th>B07107</th>\n <th>A20138</th>\n <th>B06410</th>\n <th>B08176</th>\n <th>B09879</th>\n <th>A11512</th>\n <th>A13545</th>\n <th>A23510</th>\n <th>A05706</th>\n <th>B53285</th>\n <th>A15367</th>\n <th>A35751</th>\n <th>B07230</th>\n <th>A38108</th>\n <th>B52997</th>\n <th>A12279</th>\n <th>B00382</th>\n <th>A29901</th>\n <th>A35164</th>\n <th>A30503</th>\n <th>B53325</th>\n <th>B52626</th>\n <th>B54553</th>\n <th>A36607</th>\n <th>A13050</th>\n <th>A05761</th>\n <th>B53990</th>\n <th>B51927</th>\n <th>B06834</th>\n <th>A08883</th>\n <th>B52273</th>\n <th>A28126</th>\n <th>B03525</th>\n <th>A28986</th>\n <th>B62212</th>\n <th>B00796</th>\n <th>B00858</th>\n <th>B05946</th>\n <th>B54521</th>\n <th>A10596</th>\n </tr>\n </thead>\n</table>"
$x$options
$x$options$columnDefs
$x$options$columnDefs[[1]]
$x$options$columnDefs[[1]]$className
[1] "dt-right"
$x$options$columnDefs[[1]]$targets
[1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
[57] 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
[113] 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
$x$options$columnDefs[[2]]
$x$options$columnDefs[[2]]$orderable
[1] FALSE
$x$options$columnDefs[[2]]$targets
[1] 0
$x$options$order
list()
$x$options$autoWidth
[1] FALSE
$x$options$orderClasses
[1] FALSE
attr(,"escapeIdx")
[1] "true"
$x$selection
$x$selection$mode
[1] "multiple"
$x$selection$selected
NULL
$x$selection$target
[1] "row"
attr(,"colnames")
[1] " " "A31241" "A07627" "A16016" "A12853" "A41135" "A18307" "B08456" "B55270" "B62321" "A33328" "B00506" "B09297" "B54759" "A39244" "B62382" "A19082" "B09987" "A18725" "A08725" "A35720" "B09869" "B50819" "B53477" "B09936"
[26] "B03613" "B52639" "B08543" "B03745" "A33401" "A10602" "A40627" "A32982" "A36949" "A34182" "B50075" "B50263" "B62253" "B09582" "B53330" "A15072" "A16531" "B08035" "A33134" "B06394" "B51657" "A26886" "A34303" "B54178" "B07227"
[51] "A23467" "B50299" "A39840" "A19654" "B51013" "A37706" "A35050" "B53321" "A35358" "B09758" "A11460" "A11816" "A07358" "A34569" "B03416" "B62049" "B54450" "A13635" "A09636" "A27829" "B05865" "B08877" "A22227" "B53679" "A33699"
[76] "A35174" "B04000" "A09631" "A13799" "B55125" "A10401" "B07831" "B09353" "B54602" "A15597" "A08054" "A28326" "B52341" "B55335" "A40764" "B06018" "B53779" "A08424" "B50435" "B50395" "B51283" "A32642" "A17607" "A19764" "B51250"
[101] "A23005" "A25857" "B05695" "A07449" "B54644" "B06778" "B07107" "A20138" "B06410" "B08176" "B09879" "A11512" "A13545" "A23510" "A05706" "B53285" "A15367" "A35751" "B07230" "A38108" "B52997" "A12279" "B00382" "A29901" "A35164"
[126] "A30503" "B53325" "B52626" "B54553" "A36607" "A13050" "A05761" "B53990" "B51927" "B06834" "A08883" "B52273" "A28126" "B03525" "A28986" "B62212" "B00796" "B00858" "B05946" "B54521" "A10596"
attr(,"rownames")
[1] TRUE
$width
NULL
$height
NULL
$sizingPolicy
$sizingPolicy$defaultWidth
NULL
$sizingPolicy$defaultHeight
NULL
$sizingPolicy$padding
NULL
$sizingPolicy$viewer
$sizingPolicy$viewer$defaultWidth
NULL
$sizingPolicy$viewer$defaultHeight
NULL
$sizingPolicy$viewer$padding
NULL
$sizingPolicy$viewer$fill
[1] TRUE
$sizingPolicy$viewer$suppress
[1] FALSE
$sizingPolicy$viewer$paneHeight
NULL
$sizingPolicy$browser
$sizingPolicy$browser$defaultWidth
NULL
$sizingPolicy$browser$defaultHeight
NULL
$sizingPolicy$browser$padding
NULL
$sizingPolicy$browser$fill
[1] FALSE
$sizingPolicy$knitr
$sizingPolicy$knitr$defaultWidth
[1] "100%"
$sizingPolicy$knitr$defaultHeight
[1] "auto"
$sizingPolicy$knitr$figure
[1] FALSE
$dependencies
$dependencies[[1]]
List of 10
$ name : chr "dt-core"
$ version : chr "1.10.12"
$ src :List of 1
..$ file: chr "/Library/Frameworks/R.framework/Versions/3.4/Resources/library/DT/htmlwidgets/lib/datatables"
$ meta : NULL
$ script : chr "js/jquery.dataTables.min.js"
$ stylesheet: chr [1:2] "css/jquery.dataTables.min.css" "css/jquery.dataTables.extra.css"
$ head : NULL
$ attachment: NULL
$ package : NULL
$ all_files : logi FALSE
- attr(*, "class")= chr "html_dependency"
$elementId
NULL
Generally we want well under 5% of sites failing. Error rates for our data are very low.
barplot(colMeans(detP), col=pal[factor(targets$Sample_Group)], las=2,
cex.names=0.8, ylab="Mean detection p-values")
abline(h=0.05,col="red")
legend("topleft", legend=levels(factor(targets$Sample_Group)), fill=pal,
bg="white")
barplot(colMeans(detP), col=pal[factor(targets$Sample_Group)], las=2,
cex.names=0.8, ylim=c(0,0.002), ylab="Mean detection p-values")
abline(h=0.05,col="red")
legend("topleft", legend=levels(factor(targets$Sample_Group)), fill=pal,
bg="white")
# normalize the data; this results in a GenomicRatioSet object
mSetSq <- preprocessQuantile(rgSet)
[preprocessQuantile] Mapping to genome.
[preprocessQuantile] Fixing outliers.
[preprocessQuantile] Quantile normalizing.
# create a MethylSet object from the raw data for plotting
mSetRaw <- preprocessRaw(rgSet)
# visualise what the data looks like before and after normalization
targets <- targets[match(mSetSq$Sample_Name, targets$Sample),]
par(mfrow=c(1,2))
densityPlot(rgSet, sampGroups=targets$Sample_Group,main="Raw", legend=FALSE)
densityPlot(getBeta(mSetSq), sampGroups=targets$Sample_Group,
main="Normalized", legend=FALSE)
MDS plotting reveals two distinct groups, which are revealed by labeling for different factors, to be gender
# MDS plots to look at largest sources of variation
# first, line up targets and mSetSq
targets <- targets[match(mSetSq$Sample_Name, targets$Sample),]
par(mfrow=c(2,2))
plotMDS(getM(mSetSq), top=1000, gene.selection="common",
col=pal[factor(targets$Sample_Group)])
legend("top", legend=levels(factor(targets$Sample_Group)), text.col=pal,
bg="white", cex=0.7)
plotMDS(getM(mSetSq), top=1000, gene.selection="common",
col=pal[factor(targets$Gender)])
legend("top", legend=levels(factor(targets$Gender)), text.col=pal,
bg="white", cex=0.7)
plotMDS(getM(mSetSq), top=1000, gene.selection="common",
col=pal[factor(targets$Ethnicity)])
legend("top", legend=levels(factor(targets$Ethnicity)), text.col=pal,
bg="white", cex=0.7)
plotMDS(getM(mSetSq), top=1000, gene.selection="common",
col=pal[factor(targets$Sample_Plate)])
legend("top", legend=levels(factor(targets$Sample_Plate)), text.col=pal,
bg="white", cex=0.7)
# MDS plots to look at largest sources of variation
# now with the filtered set
# again, ensure targets and mSetSqFlt line up
targets <- targets[match(mSetSqFlt$Sample_Name, targets$Sample),]
par(mfrow=c(2,2))
plotMDS(getM(mSetSqFlt), top=1000, gene.selection="common",
col=pal[factor(targets$Sample_Group)])
legend("top", legend=levels(factor(targets$Sample_Group)), text.col=pal,
bg="white", cex=0.7)
plotMDS(getM(mSetSqFlt), top=1000, gene.selection="common",
col=pal[factor(targets$Gender)])
legend("top", legend=levels(factor(targets$Gender)), text.col=pal,
bg="white", cex=0.7)
plotMDS(getM(mSetSqFlt), top=1000, gene.selection="common",
col=pal[factor(targets$Ethnicity)])
legend("top", legend=levels(factor(targets$Ethnicity)), text.col=pal,
bg="white", cex=0.7)
plotMDS(getM(mSetSqFlt), top=1000, gene.selection="common",
col=pal[factor(targets$Sample_Plate)])
legend("top", legend=levels(factor(targets$Sample_Plate)), text.col=pal,
bg="white", cex=0.7)
# calculate M-values for statistical analysis
mVals <- getM(mSetSqFlt)
#head(mVals[,1:5])
bVals <- getBeta(mSetSqFlt)
#head(bVals[,1:5])
The approach used in the manuscript
summary(decideTests(fit2))
Case-Control
-1 19
0 360931
1 101
# get the table of results for the first contrast
ann450kSub <- ann450k[match(rownames(mVals),ann450k$Name),
c(1:4,12:19,24:ncol(ann450k))]
DMPs <- topTable(fit2, num=Inf, coef=1, genelist=ann450kSub)
head(DMPs, n=50)
# get beta values
bVals <- getBeta(mSetSq)
# plot the top 16 most significantly differentially methylated CpGs
par(mfrow=c(4,4))
sapply(rownames(DMPs)[1:16], function(cpg){
plotCpg(bVals, cpg=cpg, pheno=targets$Sample_Group, ylab = "Beta values")
})
$cg09249404
NULL
$cg26250086
NULL
$cg16092017
NULL
$cg18901140
NULL
$cg09935388
NULL
$cg00636368
NULL
$cg06168149
NULL
$cg09256832
NULL
$cg21390082
NULL
$cg11067714
NULL
$cg00058163
NULL
$cg05567511
NULL
$cg01257889
NULL
$cg22623319
NULL
$cg18146737
NULL
$cg03775416
NULL
The probe-wise test did not find much of anything, so let’s try the dmrcate package to find regions of differential methylation
I plot the top hit, which looks virtually identical between case and control
Test for difference in variance between groups
Nothing looks interesting
par(mfrow=c(4,4))
sapply(rownames(topDV)[1:10], function(cpg){
plotCpg(bVals, cpg=cpg, pheno=targets$Sample_Group,
ylab = "Beta values")
})
$cg14361458
NULL
$cg14501061
NULL
$cg12821876
NULL
$cg00110609
NULL
$cg05756220
NULL
$cg12936469
NULL
$cg00879475
NULL
$cg06342195
NULL
$cg26117025
NULL
$cg02049404
NULL
Largely the same results I believe. But reviewers shouldn’t have any issue with the analysis pipeline.